
sink(here("..", "results", "log_SLX_minmax.txt"))

######################################################################
######################################################################
######################################################################
######  This file contains code to run simulations for the      ######
######  SLX DGP estimated with wrong W and min-max normalization #####
######     in Betz, Cook, Hollenbach 2019                       ######
#### Bias due to network misspecification under spatial dependence ###
######################################################################
######################################################################



tic("SLX_minmax")


set.seed(12345)



#### this function runs the spatial models and records coefficients for each simulated data set given the range of misspecified W
spatial_model_run <- function(iteration, true_data, true_W, estimation_W_array){
  ##### W for estimation
  est_W  <-  estimation_W_array[,, iteration]
  #### estimate slx model, record row with NAs if model does not run
  draw <-  tryCatch({
    slx_model <- lm(y ~ x + est_W%*%x ,data = true_data)
    data.frame(t(slx_model$coefficients))
  },
  error=function(cond){
    draw <- data.frame(NA_real_,NA_real_, NA_real_)
    names(draw)<- c("intercept", "x", "lagx")
    return(draw)
  }
  )
  #### return results and cov(x,WX)
  draw$method = "SLX"
  draw$covXWX  <- cov(true_data$x,   est_W %*%true_data$x)
  names(draw) <- c("intercept", "x", "estimate_theta_x", "method", "covXWX")
  return(data.frame(iteration, draw[,c("x", "estimate_theta_x", "method", "covXWX")]))
}


simulation_run = function(theta_x, rho_x, dgp_W_norm, estimation_W_norm, x_draw){
  N_obs = length(x_draw)

  #### create pre-multiplier for spatial process
  inv_x <- solve(diag(N_obs) - (rho_x *(dgpW_norm)))

  #### random error
  u <- rnorm(N_obs)

  ####spatial x
  x <- inv_x%*%x_draw

  ### spatial lag
  lx  <- (dgpW_norm%*%x)
  #### generate y according to DGP
  y = (-2 + x*beta + theta_x*lx  + u)
  #### save data
  dgp_data <- data.frame(y, x)


  ##### run standard OLS regression
  lm_res <- lm(y ~  x, data =  dgp_data)

  ##### save OLS result
  result_ols <- data.frame(NA_real_, lm_res$coef["x"],NA_real_,"LM", NA_real_, theta_x, rho_x)
  names(result_ols) <- c("extend_misspecified", "coef", "estimate_theta_x", "method", "covXWX", "theta_x", "rho_x")
  result_ols  <- result_ols %>%
    mutate(method = as.character(method))

  ##### range of misspecification probabilities
  ##### run spatial models on extend of misspecification probs
  iterator  <- seq(1:dim(estimation_W_norm)[3])

  result_spatial <- iterator %>% map_df(spatial_model_run, true_data = dgp_data, true_W = dgpW_norm, estimation_W_array = estimation_W_norm)
  ##### add columns for theta x and rho x
  result_spatial <- data.frame(result_spatial, theta_x, rho_x)

  ##### same names for all columns and then add results together
  names(result_spatial)  <- c("extend_misspecified", "coef", "estimate_theta_x", "method", "covXWX", "theta_x", "rho_x")
  result_spatial  <- result_spatial %>%
    mutate(method = as.character(method))

  result <- bind_rows(result_ols, result_spatial)
  ###record variance of x
  result$varX  <- var(dgp_data$x)

  return(result)
}

#### parameters to run the sim on
N  <-  c(150)
thetax <- c(0, 1, 2, 3)
rhox <- seq(0, 0.6, 0.3)
#### put parameters into data frame, rename and order by N, theta.x, rho x
simulation_parameter  <-  data.frame(do.call(rbind,replicate(2000,as.matrix(expand.grid(N, thetax, rhox)),simplify = F))) %>%
      rename(N = Var1, theta_x = Var2, rho_x = Var3) %>%
      mutate(N = as.double(as.character(N)),
             rho_y = as.double(as.character(theta_x)),
             rho_x = as.double(as.character(rho_x))) %>%
      arrange(N, theta_x, rho_x)

    ########### Fix parameters for simulation, X, W_true, W_false, beta
    #### coefficient for simulations
beta  <- 2
### simulated coordinates
dgp_points <- cbind(runif(N, 0, 5), runif(N, 0, 5))

###### create 10 nearest neighbors list from points
dgp_knear <- knearneigh(dgp_points, 10, longlat = NULL)
dgp_nb <- knn2nb(dgp_knear)
##### to binary matrix
dgp_W <- nb2mat(dgp_nb, style="B", zero.policy=TRUE)

#### draw points for wrong W
wrong_points <- cbind(runif(N, 0, 5), runif(N, 0, 5))
#### neighbors from wrong points
wrong_knear <- knearneigh(wrong_points, 10, longlat = NULL)
wrong_nb <- knn2nb(wrong_knear)
#### wrong W
wrong_W <-  nb2mat(wrong_nb, style="B", zero.policy=TRUE)

##### draw x vector
random_x <- rnorm(N)


#### misspecification probabilities
misspecification <-  seq(0, 1, 0.05)

#### empty array that will store the misspecification matrices
estimation_W <- array(NA, dim = c(N, N, length(misspecification)))

for(i in 1:length(misspecification)){
  #### create matrix with 0/1 where probability of 1 is the probability of misspecification
  cell_determinant <- matrix(rbinom(N * N, 1,misspecification[i]), ncol = N, nrow = N)
  ### place holder matrix
  mat<-matrix(NA, nrow = N, ncol = N)
  #### cell of estimation W comes from true W if cell in previous matrix was 0, if cell = 1 from wrong W
  mat[cell_determinant == 0] <- dgp_W[cell_determinant == 0]
  mat[cell_determinant == 1] <- wrong_W[cell_determinant == 1]
  ### assign to array
  estimation_W[,, i]  <- mat
}

### min-max normalization
value  <- min(max(colSums(dgp_W)), max(rowSums(dgp_W)))
dgpW_norm <-dgp_W/value

estimation_W_norm <- array(NA, dim = dim(estimation_W))
for(i in 1:dim(estimation_W)[3]){
  value_wrong  <-  min(max(colSums(estimation_W[,, i])), max(rowSums(estimation_W[,, i])))
  estimation_W_norm[,, i]<-estimation_W[,, i]/value_wrong
}

save(dgpW_norm, file = here("..", "data","DGP_W_knn_slx_minmax.rda"))
save(estimation_W_norm, file = here("..", "data","estimation_W_knn_slx_minmax.rda"))



#### how many simulations will be run?
dim(simulation_parameter)
seeds = seq(1,dim(simulation_parameter)[1], 1)


cl <- makeCluster(detectCores())
print(cl)
registerDoParallel(cl)


#### run sims on all parameter combinations

simulation_misspecified_W_slx_minmax <- foreach(i = 1:dim(simulation_parameter)[1],
                                                .verbose =FALSE,
                                                .errorhandling = "stop",
                                                .combine = rbind,
                                                .export = c("simulation_parameter","simulation_run","seeds", "spatial_model_run"),
                                                .packages = c("spdep", "tidyverse", "MASS")
                                                ) %dopar% {
                                                  library(here)
                                                  library(spdep)
                                                  library(tidyverse)
                                                  library(MASS)
                                                  set.seed(seeds[i])
                                                  res = simulation_run(theta_x = simulation_parameter[i,"theta_x"], rho_x = simulation_parameter[i, "rho_x"], dgp_W = dgpW_norm, estimation_W = estimation_W_norm, x_draw = random_x)
                                                  return(res)
                                                  rm(res)
                                                }

names(simulation_misspecified_W_slx_minmax) <- c("prob_misspecified","coef","estimate_theta_x","method", "covXWX", "theta_x", "rho_x", "varX")
simulation_misspecified_W_slx_minmax$prob_misspecified  <-misspecification[simulation_misspecified_W_slx_minmax$prob_misspecified]
save(simulation_misspecified_W_slx_minmax, file = here("..","data","misspecified_W_slx_minmax.rda"))

toc()

stopCluster(cl)

sink()
